# import libraries
import pandas as pd
import pandas_profiling
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.stattools import adfuller
from numpy import log
%matplotlib inline
from matplotlib import pyplot as plt
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.arima_model import ARIMA
import pmdarima as pmd
from pmdarima.arima.utils import ndiffs
#from pmdarima.arima import ADFTest
# Define helper functions
def arimamodel(timeseriesarray, test):
autoarima_model = pmd.auto_arima(timeseriesarray,
start_p=0,
start_q=0,
test=test,
trace=True)
return autoarima_model
# Define settings values for plotting
plt.rcParams.update({'figure.figsize':(15,10), 'figure.dpi':120})
# load data into dataframe
df = pd.read_csv('FluNetInteractiveReport.csv')
#df.columns.tolist()
# Profile
profile = df.profile_report(title = "Flu Dataset")
profile.to_notebook_iframe()
# Number of specimes columns: Received/Coll, Processed
# Number of influenza A viruses detected by subtype: A(H1), A (H1N1) pdm09, A (H3), A (H5), A (not subtyped), A (total)
# Number of influenza B viruses detected by subtype: B (Yamagata), B (victoria), B (lineage not determined), B (total)
# Total number of influenza positive
# Total number of influenza negative
# ILI activity
Summarize dataset: 100%|██████████| 36/36 [03:32<00:00, 5.90s/it, Completed] Generate report structure: 100%|██████████| 1/1 [00:43<00:00, 43.49s/it] Render HTML: 100%|██████████| 1/1 [00:22<00:00, 22.44s/it]